set.seed(5)

### India analysis -- Online/respondent level

setwd("~/Dropbox/GuessNyhanReifler/DART0023/PNAS/post_accept/replication files/Compliance analyses (Online Appendix B)")
respLevelIndia = readstata13::read.dta13("tipsbalance-online-onlineAttrition-onlineHA2_data.dta")

## Subsetting to complete cases  
respLevelIndia$complete<-complete.cases(respLevelIndia[,c("mean_accuracy_diffw1", "r_tips", "tips")])
respLevelIndia<-respLevelIndia[respLevelIndia$complete,]

## Make age categoricals
respLevelIndia$age1<-(respLevelIndia$agegroup==1)*1
respLevelIndia$age2<-(respLevelIndia$agegroup==2)*1
respLevelIndia$age3<-(respLevelIndia$agegroup==3)*1
respLevelIndia$age4<-(respLevelIndia$agegroup==4)*1
table(respLevelIndia$age1)

## Make caste variable at categorical level
respLevelIndia$caste1=(respLevelIndia$caste==1)*1
respLevelIndia$caste2=(respLevelIndia$caste==2)*1
respLevelIndia$caste3=(respLevelIndia$caste==3)*1
respLevelIndia$caste4=(respLevelIndia$caste==4)*1
respLevelIndia$caste5=(respLevelIndia$caste==5)*1
table(respLevelIndia$caste1)


### Rescale know and int
respLevelIndia$political_interest<-(respLevelIndia$political_interest-1)/4
table(respLevelIndia$political_interest)
respLevelIndia$political_knowledge<-(respLevelIndia$political_knowledge-1)/4
table(respLevelIndia$political_knowledge)


library(ivdesc)

estimateCompliance<-function(Xname, Dname, Zname, data, bootn){
  # Remove missing
  print(Xname)
  data=data[complete.cases(data[,c(Xname, Dname, Dname)]),]
  
  thisX=data[, Xname] #covariate
  thisD=data[, Dname] #treatment
  thisZ=data[, Zname] #instrument
  thisEst<-ivdesc::ivdesc(X=thisX, D=thisD, Z=thisZ, bootn=bootn, balance=TRUE)
  
  outTable<-matrix(ncol=4, nrow=2) # table to store output
  colnames(outTable)<-c("mu_nt", "mu_co", "mu", "pvalue")
  rownames(outTable)<-c("Estimates", "Std. Err.")
  
  outTable[1,1:3]<-thisEst$mu[3:1] # re-ordered to match outable columns
  outTable[2,1:3]<-thisEst$mu_se[3:1]
  outTable[1, 4]<-min(as.numeric(levels(unlist(attr(thisEst, "pvals")[1,2:3]))), na.rm=TRUE) # horrible code to access the p-values
  class(outTable)<-"compliance"
  return(outTable)
}

estimateCompliance(Xname="whatsapp", Dname="r_tips", Zname="tips", data=respLevelIndia, bootn=2000)



## Function for adding points for one variable to an existing plot
plot.compliance<-function(x, y, label, col=c("gray1", "gray50", "gray80"), pch=c(19, 15, 17)){
  start=y
  points(x=x["Estimates",1:3], y=(start:(start+2)), col=col, pch=pch)
  segments(x0 = x["Estimates",1:3]-1.96*x["Std. Err.",1:3], y0=(start:(start+2)), 
           x1 = x["Estimates",1:3]+1.96*x["Std. Err.",1:3], y1=(start:(start+2)), col=col)
  if(x[1,"pvalue"]<.05){
    label<-paste0(label, '*')
  }
  mtext(label, at = start+1, side=2, las=1, cex=1, line=.5)  
}

varList<-list("male", "whatsapp", "hindu", "muslim", "age4", "age3", "age2",
              "age1", "political_knowledge", "political_interest", "college", 
              "caste5", "caste4", "caste3", "caste2", "caste1")
varLab<-list("Male", "WhatsApp use", "Hindu", "Muslim", "Age 61+", "Age 46-60", "Age 31-45",
             "Age 18-30", "Political knowledge", "Political interest", "College", 
             "Other Caste", "General Caste", "Other Backward Class" , "Scheduled Tribe",  "Scheduled Caste")
resultsIndiaOnline<-lapply(varList, estimateCompliance, data=respLevelIndia, Zname="tips", Dname="r_tips", bootn=2000)
names(resultsIndiaOnline)<-varList

### Make the plot for India online sample
end<-length(varList)*4
startVals<-seq(1, end, by=4)

pdf(width=10, height=6, "Compliance-India-Online.pdf")
par(mgp=c(1,0, 0), tcl=.2, mar=c(4, 10, 1, 1))
plot(NULL, xlim=c(0, 1), ylim=c(1, end), xlab="Mean value", ylab="", bty="n", yaxt="n")
for(i in 1:length(varList)){
  print(i)
  plot(resultsIndiaOnline[[varList[[i]]]], startVals[i], label=varLab[i])
}
par(xpd=TRUE)
legend(x=.2, y=-8, pch=c(17, 15, 19),col=c("gray80", "gray50", "gray1"), 
       legend=c("Sample", "Compliers", "Never-takers"), bty="n", lty=c(1, 1, 1), horiz=TRUE)
par(xpd=FALSE)
dev.off()

############ Face to face India analysis

respLevelIndia = readstata13::read.dta13("tipsbalance-ftf-ftfAttrition-ftfHA2_data.dta")

## Subsetting to complete cases  
respLevelIndia$complete<-complete.cases(respLevelIndia[,c("mean_accuracy_diffw1", "r_tips", "tips")])
respLevelIndia<-respLevelIndia[respLevelIndia$complete,]

## Make agegroupVariable
respLevelIndia$age1<-(respLevelIndia$agegroup==1)*1
respLevelIndia$age2<-(respLevelIndia$agegroup==2)*1
respLevelIndia$age3<-(respLevelIndia$agegroup==3)*1
respLevelIndia$age4<-(respLevelIndia$agegroup==4)*1

## Make caste variable
respLevelIndia$caste1=(respLevelIndia$caste==1)*1
respLevelIndia$caste2=(respLevelIndia$caste==2)*1
respLevelIndia$caste3=(respLevelIndia$caste==3)*1
respLevelIndia$caste4=(respLevelIndia$caste==4)*1
respLevelIndia$caste5=(respLevelIndia$caste==5)*1

### R-scale know and int
respLevelIndia$political_interest<-(respLevelIndia$political_interest-1)/4
respLevelIndia$political_knowledge<-(respLevelIndia$political_knowledge-1)/4



varList<-list("male", "whatsapp", "hindu", "muslim", "age4", "age3", "age2",
              "age1", "political_knowledge", "political_interest", "college", 
              "caste5", "caste4", "caste3", "caste2", "caste1")
varLab<-list("Male", "WhatsApp use", "Hindu", "Muslim", "Age 61+", "Age 46-60", "Age 31-45",
             "Age 18-30", "Political knowledge", "Political interest", "College", 
             "Other Caste", "General Caste", "Other Backward Class" , "Scheduled Tribe",  "Scheduled Caste")

resultsIndiaFTF<-lapply(varList, estimateCompliance, data=respLevelIndia, Zname="tips", Dname="r_tips", bootn=2000)
names(resultsIndiaFTF)<-varList
resultsIndiaFTF

### Make the plot for India face-to-face sample
end<-length(varList)*4
startVals<-seq(1, end, by=4)

pdf(width=10, height=6, "Compliance-India-FTF.pdf")
par(mgp=c(1,0, 0), tcl=.2, mar=c(4, 10, 1, 1))
plot(NULL, xlim=c(0, 1), ylim=c(1, end), xlab="Mean value", ylab="", bty="n", yaxt="n")
for(i in 1:length(varList)){
  plot(resultsIndiaFTF[[varList[[i]]]], startVals[i], label=varLab[i])
}
par(xpd=TRUE)
legend(x=.2, y=-8, pch=c(17, 15, 19),col=c("gray80", "gray50", "gray1"), 
       legend=c("Sample", "Compliers", "Never-takers"), bty="n", lty=c(1, 1, 1), horiz=TRUE)
par(xpd=FALSE)
dev.off()

########## US analysis


respLevelUS = readstata13::read.dta13("US-respondent1_pre.dta", convert.factors = FALSE, generate.factors = T)


## Recodign a few variables to fall between 0/1
respLevelUS$polknow_scaled<-(respLevelUS$polknow-1)/4
respLevelUS$polint_scaled<-(respLevelUS$polint-1)/4
respLevelUS$conspiracy_mean_scaled<-(respLevelUS$conspiracy_mean-1)/4
respLevelUS$pol_therm_media_scaled<-(respLevelUS$pol_therm_media-1)/100
respLevelUS$pol_therm_trump_scaled<-(respLevelUS$pol_therm_trump-1)/100
respLevelUS$affect_polar_leanw1<-((respLevelUS$affect_polar+100)/200)

table(respLevelUS$female)
respLevelUS$male=(1-respLevelUS$female)
table(respLevelUS$male)

varList<-list("male", "polknow_scaled", "polint_scaled", "conspiracy_mean_scaled", "agecat4", "agecat3", "agecat2", "agecat1", 
  "college", "pol_therm_media_scaled", "pol_therm_trump_scaled",  "repub_leaners", 
  "dem_leaners", "affect_polar_leanw1")


# Note would like to get mean pre-survey diet but tons of missingess and affective polarization
varLab<-c("Male", "Political knowledge", "Political interest", "Conspiracy predispositions", "Age 60+", "Age 45-59", "Age 30-44", "Age 18-29", 
             "College", "Media feelings", "Trump feelings",  "Republican", 
             "Democrat", "Affective polarization")

# It works!
estimateCompliance(data = respLevelUS, Xname = "totalfakecount18_presurvey", Zname = "tips", Dname="r_tips", bootn=1000)

## Do estimates for US
resultsUS<-lapply(varList, estimateCompliance, data=respLevelUS, Zname="tips", Dname="r_tips", bootn=2000)
names(resultsUS)<-varList



## Make the plot
end<-length(varList)*4
startVals<-seq(1, end, by=4)

pdf(width=10, height=6, "Compliance-US.pdf")
par(mgp=c(1,0, 0), tcl=.2, mar=c(5, 12, 1, 1))
plot(NULL, xlim=c(0, 1), ylim=c(1, end), xlab="Mean value", ylab="", bty="n", yaxt="n")
for(i in 1:length(varList)){
  plot(resultsUS[[varList[[i]]]], startVals[i], label=varLab[i])
}
par(xpd=TRUE)
legend(x=.2, y=-8, pch=c(17, 15, 19),col=c("gray80", "gray50", "gray1"), 
       legend=c("Sample", "Compliers", "Never-takers"), bty="n", lty=c(1, 1, 1), horiz=TRUE)
par(xpd=FALSE)
dev.off()

## In text we discuss differences in exposure to fake news
estimateCompliance(data = respLevelUS, Xname = "totalfakecount18_presurvey", Zname = "tips", Dname="r_tips", bootn=2000)
table(is.na(respLevelUS$totalfakecount18_presurvey))


